clear all

*Define global path for replication package
global path "~/Dropbox/IT_Revolution/Replication_package/JPE submission"

global path_rawdata "$path/Raw_data"
global path_cleandata "$path/Clean_data"
global path_output "$path/Output"

********************************************************************************
*** ICT (1980-2019)
********************************************************************************

*** Step 1: Data cleaning
********************************************************************************
use  "$path_rawdata/raw_data_impus_ICT.dta", replace

*Adjust ACS years
replace year = 2010 if year == 2009 | year == 2011
replace year = 2018 if year == 2017 | year == 2019

*select sample of employed males aged 16-64, adjust years 
drop if age < 16 | age > 64 
drop if labforce == 1

*create occ variable 
keep if occ1990 < 990
rename occ1990 occ
merge m:1 occ using "$path_rawdata/occ1990_occ1990dd.dta"
drop if _merge < 3
drop _merge

*clean missing wage/income data
replace incwage = . if incwage >= 999998 | incwage <= 0
gen lincwage = log(incwage)

*create weeks worked per year
gen weeks = . 
replace weeks = 43.5 	if wkswork2 == 4
replace weeks = 48.5 	if wkswork2 == 5
replace weeks = 51 		if wkswork2 == 6

*create hours worked per week
gen hours = uhrswork
replace hours = . if uhrswork == 0

*create full-time dummy
gen full_time = 0
replace full_time = 1 if (weeks >=40 & hours >=35)
replace full_time = . if weeks ==.|hours ==.

*generate dummy variables
gen white = race == 1
gen self_employed = classwkr == 1
gen USborn = bpl < 150 

*generate employment variable for aggregation
gen emp = 1
replace emp = 1/3 if year >= 2010

save "$path_cleandata/data_clean_1980_2010.dta", replace

*** Step 2: Create occupation exposure to ICT
********************************************************************************
use "$path_cleandata/data_clean_1980_2010.dta", clear

keep if sex == 1
drop if classwkr == 1
drop if full_time == 0

keep if year == 1980
merge m:1 ind1990 using "$path_cleandata/ICT_shares_final"
drop _m

collapse (mean) ict=value [aw = perwt], by(occ1990dd) fast

merge m:1 occ1990dd using "$path_rawdata/occ1990dd_task_alm.dta"
drop if _m == 2
drop _m

save "$path_cleandata/exposureICT.dta", replace

*** Step 3: Generate sample by occupation and group
********************************************************************************
foreach spec of numlist 1/3 {
	use "$path_cleandata/data_clean_1980_2010.dta", clear 

	if `spec' == 1 {
		keep if sex == 1
		local age_range 25/35
	}
	if `spec' == 2 {
		keep if sex == 1
		keep if USborn == 1
		local age_range 29
	}	
	if `spec' == 3 {
		keep if sex == 1
		keep if white == 1
		local age_range 29
	}

	*define wage variables
	gen emp2 = emp
	replace emp2 = emp2*3 if year >= 2010	// adjust employment for average wage bill

	foreach var of varlist inc* emp2 {
		replace `var' = . if full_time == 0
	}

	*generate occupation dataset for all workers
	preserve
		gen group = 0
		 
		collapse (sum) emp emp2 wage_agg=incwage (mean) hours full_time self_employed [aw = perwt], by(occ1990dd year group) fast
									
		save "$path_cleandata/data_occ_1980_2010_s`spec'_all.dta", replace						
	restore 
									
	*create data by age group
	foreach age of numlist `age_range' {
		
	preserve
		gen group = 1 if age <= `age'
		replace group = 2 if age > `age'

		collapse (sum) emp emp2 wage_agg=incwage (mean) hours full_time self_employed [aw = perwt], by(occ1990dd year group) fast
									
		append using "$path_cleandata/data_occ_1980_2010_s`spec'_all.dta"
		 
		gen lavgwage = log( wage_agg/emp2	)									
		drop wage_agg emp2
		
		reshape wide emp-lavgwage, i(occ1990dd group) j(year)
		merge m:1 occ1990dd using "$path_cleandata/exposureICT.dta"
		drop _m
		
		save "$path_cleandata/data_occ_1980_2010_s`spec'_a`age'.dta", replace
	restore 
	}

	erase "$path_cleandata/data_occ_1980_2010_s`spec'_all.dta"   
}

*** Step 4: Bilateral distance between occupations
*******************************************************************************
use "$path_cleandata/data_occ_1980_2010_s1_a29.dta", clear

gen change_emp4 = log(emp2018/emp1980)

*select occupations with positive employment in 1980 and 2018
gen aind = change_emp4 != .
egen ind = sum(aind), by(occ1990dd)
keep if ind == 3

keep if group==0
drop group

gen temp = task_abstract+task_routine+task_manual

foreach var of varlist task_abstract task_routine task_manual{
	replace `var' = `var'/temp
}

drop temp

*standardized exposure measures
egen exposure_a = std(task_abstract)
egen exposure_ict = std(ict)

keep emp* exp* task* occ1990dd
gen merge_var = 1

preserve

	foreach var of varlist emp* exposure* task* occ1990dd{
		rename `var' `var'_2
	}

	save "$path_cleandata/temp", replace
	
restore

joinby merge_var using "$path_cleandata/temp"

drop if occ1990dd==occ1990dd_2

drop merge_var

gen d_ent = 0
foreach var of varlist  task_abstract task_manual task_routine{
	replace d_ent = d_ent + `var'*log(`var'/`var'_2)
}
drop task*

save "$path_cleandata/temp_distance_all_later.dta", replace

*** Step 5: Compute average distance metric
********************************************************************************
use "$path_cleandata/temp_distance_all_later.dta", clear

collapse (mean) ad_ent=d_ent [aw = emp1980], by(occ1990dd_2)

rename occ1990dd_2 occ1990dd

save "$path_cleandata/distance_temp.dta", replace

foreach spec of numlist 1/3 {
	use "$path_cleandata/data_occ_1980_2010_s`spec'_a29.dta", clear 
	
	merge m:1 occ1990dd using  "$path_cleandata/distance_temp.dta"
	drop _m 
	
	save "$path_cleandata/data_occ_1980_2010_s`spec'_a29distance.dta", replace
}
erase "$path_cleandata/distance_temp.dta"
erase "$path_cleandata/temp.dta"
